% Encoding: UTF-8


@Article{DBeck_JFoster2015-B,
  Title                    = {Seed: a user-friendly tool for exploring and visualizing microbial community data.},
  Author                   = {Beck, Daniel and Dennis, Christopher and Foster, James A.},
  Journal                  = {Bioinformatics},
  Year                     = {2015},

  Month                    = {Feb},
  Number                   = {4},
  Pages                    = {602--603},
  Volume                   = {31},

  Abstract                 = {In this article we present Simple Exploration of Ecological Data (Seed), a data exploration tool for microbial communities. Seed is written in R using the Shiny library. This provides access to powerful R-based functions and libraries through a simple user interface. Seed allows users to explore ecological datasets using principal coordinate analyses, scatter plots, bar plots, hierarchal clustering and heatmaps.Seed is open source and available at https://github.com/danlbek/Seed.danlbek@gmail.comSupplementary data are available at Bioinformatics online.},
  Doi                      = {10.1093/bioinformatics/btu693},
  File                     = {Published version:DBeck_JFoster2015-B.pdf:PDF},
  Institution              = {Department of Biological Sciences, University of Idaho, Moscow, ID 83844, USA.},
  Keywords                 = {Bacteria, classification/genetics; Computational Biology, methods; Computer Graphics; Ecology; Software},
  Language                 = {eng},
  Medline-pst              = {ppublish},
  Owner                    = {fbreitwieser},
  Pii                      = {btu693},
  Pmid                     = {25332377},
  Timestamp                = {2015.11.18},
  Url                      = {http://dx.doi.org/10.1093/bioinformatics/btu693}
}

@Article{MBostock_JHeer2011-ITVCG,
  author      = {Bostock, Michael and Ogievetsky, Vadim and Heer, Jeffrey},
  title       = {D\textsuperscript{3}: Data-Driven Documents.},
  journal     = {IEEE Trans Vis Comput Graph},
  year        = {2011},
  volume      = {17},
  number      = {12},
  pages       = {2301--2309},
  month       = {Dec},
  abstract    = {Data-Driven Documents (D3) is a novel representation-transparent approach to visualization for the web. Rather than hide the underlying scenegraph within a toolkit-specific abstraction, D3 enables direct inspection and manipulation of a native representation: the standard document object model (DOM). With D3, designers selectively bind input data to arbitrary document elements, applying dynamic transforms to both generate and modify content. We show how representational transparency improves expressiveness and better integrates with developer tools than prior approaches, while offering comparable notational efficiency and retaining powerful declarative components. Immediate evaluation of operators further simplifies debugging and allows iterative development. Additionally, we demonstrate how D3 transforms naturally enable animation and interaction with dramatic performance improvements over intermediate representations.},
  doi         = {10.1109/TVCG.2011.185},
  institution = {Computer Science Department of Stanford University, Stanford, CA 94305, USA. mbostock@stanford.edu},
  language    = {eng},
  medline-pst = {ppublish},
  owner       = {fbreitwieser},
  pmid        = {22034350},
  timestamp   = {2015.11.24},
  url         = {http://dx.doi.org/10.1109/TVCG.2011.185},
}

@Article{BBuchfink_DHuson2015-NM,
  Title                    = {Fast and sensitive protein alignment using DIAMOND.},
  Author                   = {Buchfink, Benjamin and Xie, Chao and Huson, Daniel H.},
  Journal                  = {Nat Methods},
  Year                     = {2015},

  Month                    = {Jan},
  Number                   = {1},
  Pages                    = {59--60},
  Volume                   = {12},

  Abstract                 = {The alignment of sequencing reads against a protein reference database is a major computational bottleneck in metagenomics and data-intensive evolutionary projects. Although recent tools offer improved performance over the gold standard BLASTX, they exhibit only a modest speedup or low sensitivity. We introduce DIAMOND, an open-source algorithm based on double indexing that is 20,000 times faster than BLASTX on short reads and has a similar degree of sensitivity.},
  Doi                      = {10.1038/nmeth.3176},
  File                     = {Published version:BBuchfink_DHuson2015-NM.pdf:PDF},
  Institution              = { Singapore Centre on Environmental Life Sciences Engineering, School of Biological Sciences, Nanyang Technological University, Singapore.},
  Keywords                 = {Algorithms; Base Sequence; Humans; Metagenomics, methods; Microbiota, genetics; Sensitivity and Specificity; Sequence Alignment, methods; Sequence Analysis, DNA; Software},
  Language                 = {eng},
  Medline-pst              = {ppublish},
  Owner                    = {fbreitwieser},
  Pii                      = {nmeth.3176},
  Pmid                     = {25402007},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.1038/nmeth.3176}
}

@Article{AByrd_WJohnson2014-BB,
  Title                    = {Clinical PathoScope: rapid alignment and filtration for accurate pathogen identification in clinical samples using unassembled sequencing data.},
  Author                   = {Byrd, Allyson L. and Perez-Rogers, Joseph F. and Manimaran, Solaiappan and Castro-Nallar, Eduardo and Toma, Ian and McCaffrey, Tim and Siegel, Marc and Benson, Gary and Crandall, Keith A. and Johnson, William Evan},
  Journal                  = {BMC Bioinformatics},
  Year                     = {2014},
  Pages                    = {262},
  Volume                   = {15},

  Abstract                 = {The use of sequencing technologies to investigate the microbiome of a sample can positively impact patient healthcare by providing therapeutic targets for personalized disease treatment. However, these samples contain genomic sequences from various sources that complicate the identification of pathogens.Here we present Clinical PathoScope, a pipeline to rapidly and accurately remove host contamination, isolate microbial reads, and identify potential disease-causing pathogens. We have accomplished three essential tasks in the development of Clinical PathoScope. First, we developed an optimized framework for pathogen identification using a computational subtraction methodology in concordance with read trimming and ambiguous read reassignment. Second, we have demonstrated the ability of our approach to identify multiple pathogens in a single clinical sample, accurately identify pathogens at the subspecies level, and determine the nearest phylogenetic neighbor of novel or highly mutated pathogens using real clinical sequencing data. Finally, we have shown that Clinical PathoScope outperforms previously published pathogen identification methods with regard to computational speed, sensitivity, and specificity.Clinical PathoScope is the only pathogen identification method currently available that can identify multiple pathogens from mixed samples and distinguish between very closely related species and strains in samples with very few reads per pathogen. Furthermore, Clinical PathoScope does not rely on genome assembly and thus can more rapidly complete the analysis of a clinical sample when compared with current assembly-based methods. Clinical PathoScope is freely available at: http://sourceforge.net/projects/pathoscope/.},
  Doi                      = {10.1186/1471-2105-15-262},
  File                     = {Published version:AByrd_WJohnson2014-BB.pdf:PDF},
  Institution              = {Department of Bioinformatics, Boston University, Boston, MA, USA. kcrandall@gwu.edu.},
  Keywords                 = {Base Sequence; Computational Biology, methods; Host-Pathogen Interactions; Humans; Microbiological Techniques, methods; Phylogeny; Sequence Alignment, methods; Sequence Analysis, methods; Species Specificity; Time Factors},
  Language                 = {eng},
  Medline-pst              = {epublish},
  Owner                    = {fbreitwieser},
  Pii                      = {1471-2105-15-262},
  Pmid                     = {25091138},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.1186/1471-2105-15-262}
}

@Article{BHoffmann_MBeer2012-EID,
  Title                    = {Novel orthobunyavirus in Cattle, Europe, 2011.},
  Author                   = {Hoffmann, Bernd and Scheuch, Matthias and H{\"{o}}per, Dirk and Jungblut, Ralf and Holsteg, Mark and Schirrmeier, Horst and Eschbaumer, Michael and Goller, Katja V. and Wernike, Kerstin and Fischer, Melina and Breithaupt, Angele and Mettenleiter, Thomas C. and Beer, Martin},
  Journal                  = {Emerg Infect Dis},
  Year                     = {2012},

  Month                    = {Mar},
  Number                   = {3},
  Pages                    = {469--472},
  Volume                   = {18},

  Abstract                 = {In 2011, an unidentified disease in cattle was reported in Germany and the Netherlands. Clinical signs included fever, decreased milk production, and diarrhea. Metagenomic analysis identified a novel orthobunyavirus, which subsequently was isolated from blood of affected animals. Surveillance was initiated to test malformed newborn animals in the affected region.},
  Doi                      = {10.3201/eid1803.111905},
  File                     = {Published version:BHoffmann_MBeer2012-EID.pdf:PDF},
  Institution              = {Friedrich-Loeffl er-Institut, Greifswald–Insel Riems, Germany.},
  Keywords                 = {Animals; Bunyaviridae Infections, epidemiology/veterinary/virology; Cattle; Cattle Diseases, epidemiology/virology; Cell Line; Cricetinae; Disease Outbreaks, veterinary; Germany, epidemiology; Netherlands, epidemiology; Nucleocapsid Proteins, genetics; Orthobunyavirus, classification/genetics/isolation /&/ purification; Phylogeny; Real-Time Polymerase Chain Reaction},
  Language                 = {eng},
  Medline-pst              = {ppublish},
  Owner                    = {fbreitwieser},
  Pmid                     = {22376991},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.3201/eid1803.111905}
}

@Article{PKitts_AKimchi2016-NAR,
  author      = {Kitts, Paul A. and Church, Deanna M. and Thibaud-Nissen, Fran{\c{c}}oise and Choi, Jinna and Hem, Vichet and Sapojnikov, Victor and Smith, Robert G. and Tatusova, Tatiana and Xiang, Charlie and Zherikov, Andrey and DiCuccio, Michael and Murphy, Terence D. and Pruitt, Kim D. and Kimchi, Avi},
  title       = {Assembly: a resource for assembled genomes at NCBI.},
  journal     = {Nucleic Acids Res},
  year        = {2016},
  volume      = {44},
  number      = {D1},
  pages       = {D73--D80},
  month       = {Jan},
  abstract    = {The NCBI Assembly database (www.ncbi.nlm.nih.gov/assembly/) provides stable accessioning and data tracking for genome assembly data. The model underlying the database can accommodate a range of assembly structures, including sets of unordered contig or scaffold sequences, bacterial genomes consisting of a single complete chromosome, or complex structures such as a human genome with modeled allelic variation. The database provides an assembly accession and version to unambiguously identify the set of sequences that make up a particular version of an assembly, and tracks changes to updated genome assemblies. The Assembly database reports metadata such as assembly names, simple statistical reports of the assembly (number of contigs and scaffolds, contiguity metrics such as contig N50, total sequence length and total gap length) as well as the assembly update history. The Assembly database also tracks the relationship between an assembly submitted to the International Nucleotide Sequence Database Consortium (INSDC) and the assembly represented in the NCBI RefSeq project. Users can find assemblies of interest by querying the Assembly Resource directly or by browsing available assemblies for a particular organism. Links in the Assembly Resource allow users to easily download sequence and annotations for current versions of genome assemblies from the NCBI genomes FTP site.},
  doi         = {10.1093/nar/gkv1226},
  institution = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA.},
  language    = {eng},
  medline-pst = {ppublish},
  owner       = {fbreitwieser},
  pii         = {gkv1226},
  pmc         = {PMC4702866},
  pmid        = {26578580},
  timestamp   = {2016.04.06},
  url         = {http://dx.doi.org/10.1093/nar/gkv1226},
}

@Article{PMcMurdie_SHolmes2013-PO,
  Title                    = {phyloseq: an R package for reproducible interactive analysis and graphics of microbiome census data.},
  Author                   = {McMurdie, Paul J. and Holmes, Susan},
  Journal                  = {PLoS One},
  Year                     = {2013},
  Number                   = {4},
  Pages                    = {e61217},
  Volume                   = {8},

  Abstract                 = {the analysis of microbial communities through dna sequencing brings many challenges: the integration of different types of data with methods from ecology, genetics, phylogenetics, multivariate statistics, visualization and testing. With the increased breadth of experimental designs now being pursued, project-specific statistical analyses are often needed, and these analyses are often difficult (or impossible) for peer researchers to independently reproduce. The vast majority of the requisite tools for performing these analyses reproducibly are already implemented in R and its extensions (packages), but with limited support for high throughput microbiome census data.Here we describe a software project, phyloseq, dedicated to the object-oriented representation and analysis of microbiome census data in R. It supports importing data from a variety of common formats, as well as many analysis techniques. These include calibration, filtering, subsetting, agglomeration, multi-table comparisons, diversity analysis, parallelized Fast UniFrac, ordination methods, and production of publication-quality graphics; all in a manner that is easy to document, share, and modify. We show how to apply functions from other R packages to phyloseq-represented data, illustrating the availability of a large number of open source analysis techniques. We discuss the use of phyloseq with tools for reproducible research, a practice common in other fields but still rare in the analysis of highly parallel microbiome census data. We have made available all of the materials necessary to completely reproduce the analysis and figures included in this article, an example of best practices for reproducible research.The phyloseq project for R is a new open-source software package, freely available on the web from both GitHub and Bioconductor.},
  Doi                      = {10.1371/journal.pone.0061217},
  File                     = {Published version:PMcMurdie_SHolmes2013-PO.pdf:PDF},
  Institution              = {Department of Statistics, Stanford University, Stanford, California, United States of America.},
  Keywords                 = {Data Interpretation, Statistical; Humans; Metagenome; Multivariate Analysis; Phylogeny; Principal Component Analysis; Sequence Analysis, DNA; Software},
  Language                 = {eng},
  Medline-pst              = {epublish},
  Owner                    = {fbreitwieser},
  Pii                      = {PONE-D-12-31789},
  Pmid                     = {23630581},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.1371/journal.pone.0061217}
}

@Article{PMcMurdie_SHolmes2015-B,
  Title                    = {Shiny-phyloseq: Web application for interactive microbiome analysis with provenance tracking.},
  Author                   = {McMurdie, Paul J. and Holmes, Susan},
  Journal                  = {Bioinformatics},
  Year                     = {2015},

  Month                    = {Jan},
  Number                   = {2},
  Pages                    = {282--283},
  Volume                   = {31},

  Abstract                 = {We have created a Shiny-based Web application, called Shiny-phyloseq, for dynamic interaction with microbiome data that runs on any modern Web browser and requires no programming, increasing the accessibility and decreasing the entrance requirement to using phyloseq and related R tools. Along with a data- and context-aware dynamic interface for exploring the effects of parameter and method choices, Shiny-phyloseq also records the complete user input and subsequent graphical results of a user's session, allowing the user to archive, share and reproduce the sequence of steps that created their result-without writing any new code themselves.Shiny-phyloseq is implemented entirely in the R language. It can be hosted/launched by any system with R installed, including Windows, Mac OS and most Linux distributions. Information technology administrators can also host Shiny--phyloseq from a remote server, in which case users need only have a Web browser installed. Shiny-phyloseq is provided free of charge under a GPL-3 open-source license through GitHub at http://joey711.github.io/shiny-phyloseq/.},
  Doi                      = {10.1093/bioinformatics/btu616},
  File                     = {Published version:PMcMurdie_SHolmes2015-B.pdf:PDF},
  Institution              = {Department of Statistics, Stanford University, Stanford, CA 94305, USA.},
  Keywords                 = {Computer Graphics; Data Interpretation, Statistical; Humans; Microbiota; Phylogeny; Software; Web Browser},
  Language                 = {eng},
  Medline-pst              = {ppublish},
  Owner                    = {fbreitwieser},
  Pii                      = {btu616},
  Pmid                     = {25262154},
  Timestamp                = {2015.11.18},
  Url                      = {http://dx.doi.org/10.1093/bioinformatics/btu616}
}

@Article{SMerchant_SSalzberg2014-P,
  Title                    = {Unexpected cross-species contamination in genome sequencing projects.},
  Author                   = {Merchant, Samier and Wood, Derrick E. and Salzberg, Steven L.},
  Journal                  = {PeerJ},
  Year                     = {2014},
  Pages                    = {e675},
  Volume                   = {2},
  Abstract                 = {The raw data from a genome sequencing project sometimes contains DNA from contaminating organisms, which may be introduced during sample collection or sequence preparation. In some instances, these contaminants remain in the sequence even after assembly and deposition of the genome into public databases. As a result, searches of these databases may yield erroneous and confusing results. We used efficient microbiome analysis software to scan the draft assembly of domestic cow, Bos taurus, and identify 173 small contigs that appeared to derive from microbial contaminants. In the course of verifying these findings, we discovered that one genome, Neisseria gonorrhoeae TCDC-NG08107, although putatively a complete genome, contained multiple sequences that actually derived from the cow and sheep genomes. Our findings illustrate the need to carefully validate findings of anomalous DNA that rely on comparisons to either draft or finished genomes.},
  Doi                      = {10.7717/peerj.675},
  File                     = {Published version:SMerchant_SSalzberg2014-P.pdf:PDF},
  Institution              = {D , USA ; Department of Computer Science, Johns Hopkins University , USA ; Department of Biomedical Engineering, Johns Hopkins University , USA.},
  Language                 = {eng},
  Medline-pst              = {epublish},
  Owner                    = {fbreitwieser},
  Pii                      = {675},
  Pmid                     = {25426337},
  Timestamp                = {2015.11.20},
  Url                      = {http://dx.doi.org/10.7717/peerj.675}
}

@Article{SNaccache_CChiu2014-GR,
  Title                    = {A cloud-compatible bioinformatics pipeline for ultrarapid pathogen identification from next-generation sequencing of clinical samples.},
  Author                   = {Naccache, Samia N. and Federman, Scot and Veeraraghavan, Narayanan and Zaharia, Matei and Lee, Deanna and Samayoa, Erik and Bouquet, Jerome and Greninger, Alexander L. and Luk, Ka-Cheung and Enge, Barryett and Wadford, Debra A. and Messenger, Sharon L. and Genrich, Gillian L. and Pellegrino, Kristen and Grard, Gilda and Leroy, Eric and Schneider, Bradley S. and Fair, Joseph N. and Mart{\'{\i}}nez, Miguel A. and Isa, Pavel and Crump, John A. and DeRisi, Joseph L. and Sittler, Taylor and Hackett, Jr, John and Miller, Steve and Chiu, Charles Y.},
  Journal                  = {Genome Res},
  Year                     = {2014},

  Month                    = {Jul},
  Number                   = {7},
  Pages                    = {1180--1192},
  Volume                   = {24},

  Abstract                 = {Unbiased next-generation sequencing (NGS) approaches enable comprehensive pathogen detection in the clinical microbiology laboratory and have numerous applications for public health surveillance, outbreak investigation, and the diagnosis of infectious diseases. However, practical deployment of the technology is hindered by the bioinformatics challenge of analyzing results accurately and in a clinically relevant timeframe. Here we describe SURPI ("sequence-based ultrarapid pathogen identification"), a computational pipeline for pathogen identification from complex metagenomic NGS data generated from clinical samples, and demonstrate use of the pipeline in the analysis of 237 clinical samples comprising more than 1.1 billion sequences. Deployable on both cloud-based and standalone servers, SURPI leverages two state-of-the-art aligners for accelerated analyses, SNAP and RAPSearch, which are as accurate as existing bioinformatics tools but orders of magnitude faster in performance. In fast mode, SURPI detects viruses and bacteria by scanning data sets of 7-500 million reads in 11 min to 5 h, while in comprehensive mode, all known microorganisms are identified, followed by de novo assembly and protein homology searches for divergent viruses in 50 min to 16 h. SURPI has also directly contributed to real-time microbial diagnosis in acutely ill patients, underscoring its potential key role in the development of unbiased NGS-based clinical assays in infectious diseases that demand rapid turnaround times.},
  Doi                      = {10.1101/gr.171934.113},
  File                     = {Published version:SNaccache_CChiu2014-GR.pdf:PDF},
  Institution              = {Department of Laboratory Medicine, UCSF, San Francisco, California 94107, USA; UCSF-Abbott Viral Diagnostics and Discovery Center, San Francisco, California 94107, USA; Department of Medicine, Division of Infectious Diseases, UCSF, San Francisco, California 94143, USA.},
  Keywords                 = {Computational Biology, methods; Databases, Nucleic Acid; High-Throughput Nucleotide Sequencing; Humans; Metagenomics, methods; ROC Curve; Reproducibility of Results; Software},
  Language                 = {eng},
  Medline-pst              = {ppublish},
  Owner                    = {fbreitwieser},
  Pii                      = {gr.171934.113},
  Pmid                     = {24899342},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.1101/gr.171934.113}
}

@Article{SNaccache_CChiu2013-JV,
  author      = {Naccache, Samia N. and Greninger, Alexander L. and Lee, Deanna and Coffey, Lark L. and Phan, Tung and Rein-Weston, Annie and Aronsohn, Andrew and Hackett, Jr, John and Delwart, Eric L. and Chiu, Charles Y.},
  title       = {The perils of pathogen discovery: origin of a novel parvovirus-like hybrid genome traced to nucleic acid extraction spin columns.},
  journal     = {J Virol},
  year        = {2013},
  volume      = {87},
  number      = {22},
  pages       = {11966--11977},
  month       = {Nov},
  abstract    = {Next-generation sequencing was used for discovery and de novo assembly of a novel, highly divergent DNA virus at the interface between the Parvoviridae and Circoviridae. The virus, provisionally named parvovirus-like hybrid virus (PHV), is nearly identical by sequence to another DNA virus, NIH-CQV, previously detected in Chinese patients with seronegative (non-A-E) hepatitis. Although we initially detected PHV in a wide range of clinical samples, with all strains sharing ∼99\% nucleotide and amino acid identity with each other and with NIH-CQV, the exact origin of the virus was eventually traced to contaminated silica-binding spin columns used for nucleic acid extraction. Definitive confirmation of the origin of PHV, and presumably NIH-CQV, was obtained by in-depth analyses of water eluted through contaminated spin columns. Analysis of environmental metagenome libraries detected PHV sequences in coastal marine waters of North America, suggesting that a potential association between PHV and diatoms (algae) that generate the silica matrix used in the spin columns may have resulted in inadvertent viral contamination during manufacture. The confirmation of PHV/NIH-CQV as laboratory reagent contaminants and not bona fide infectious agents of humans underscores the rigorous approach needed to establish the validity of new viral genomes discovered by next-generation sequencing.},
  doi         = {10.1128/JVI.02323-13},
  institution = {Department of Laboratory Medicine, University of California, San Francisco, California, USA.},
  keywords    = {Chimera; Circoviridae Infections, genetics/virology; Circoviridae, genetics; DNA, Viral, genetics/isolation /&/ purification; Genome, Viral; High-Throughput Nucleotide Sequencing; Humans; Parvoviridae Infections, genetics/virology; Parvovirus, classification/genetics/isolation /&/ purification; Phylogeny},
  language    = {eng},
  medline-pst = {ppublish},
  owner       = {fbreitwieser},
  pii         = {JVI.02323-13},
  pmc         = {PMC3807889},
  pmid        = {24027301},
  timestamp   = {2016.04.06},
  url         = {http://dx.doi.org/10.1128/JVI.02323-13},
}

@Article{NOLeary_KPruitt2016-NAR,
  author      = {O'Leary, Nuala A. and Wright, Mathew W. and Brister, J Rodney and Ciufo, Stacy and Haddad, Diana and McVeigh, Rich and Rajput, Bhanu and Robbertse, Barbara and Smith-White, Brian and Ako-Adjei, Danso and Astashyn, Alexander and Badretdin, Azat and Bao, Yiming and Blinkova, Olga and Brover, Vyacheslav and Chetvernin, Vyacheslav and Choi, Jinna and Cox, Eric and Ermolaeva, Olga and Farrell, Catherine M. and Goldfarb, Tamara and Gupta, Tripti and Haft, Daniel and Hatcher, Eneida and Hlavina, Wratko and Joardar, Vinita S. and Kodali, Vamsi K. and Li, Wenjun and Maglott, Donna and Masterson, Patrick and McGarvey, Kelly M. and Murphy, Michael R. and O'Neill, Kathleen and Pujar, Shashikant and Rangwala, Sanjida H. and Rausch, Daniel and Riddick, Lillian D. and Schoch, Conrad and Shkeda, Andrei and Storz, Susan S. and Sun, Hanzhen and Thibaud-Nissen, Francoise and Tolstoy, Igor and Tully, Raymond E. and Vatsan, Anjana R. and Wallin, Craig and Webb, David and Wu, Wendy and Landrum, Melissa J. and Kimchi, Avi and Tatusova, Tatiana and DiCuccio, Michael and Kitts, Paul and Murphy, Terence D. and Pruitt, Kim D.},
  title       = {Reference sequence (RefSeq) database at NCBI: current status, taxonomic expansion, and functional annotation.},
  journal     = {Nucleic Acids Res},
  year        = {2016},
  volume      = {44},
  number      = {D1},
  pages       = {D733--D745},
  month       = {Jan},
  abstract    = {The RefSeq project at the National Center for Biotechnology Information (NCBI) maintains and curates a publicly available database of annotated genomic, transcript, and protein sequence records (http://www.ncbi.nlm.nih.gov/refseq/). The RefSeq project leverages the data submitted to the International Nucleotide Sequence Database Collaboration (INSDC) against a combination of computation, manual curation, and collaboration to produce a standard set of stable, non-redundant reference sequences. The RefSeq project augments these reference sequences with current knowledge including publications, functional features and informative nomenclature. The database currently represents sequences from more than 55 000 organisms (>4800 viruses, >40 000 prokaryotes and >10 000 eukaryotes; RefSeq release 71), ranging from a single record to complete genomes. This paper summarizes the current status of the viral, prokaryotic, and eukaryotic branches of the RefSeq project, reports on improvements to data access and details efforts to further expand the taxonomic representation of the collection. We also highlight diverse functional curation initiatives that support multiple uses of RefSeq data including taxonomic validation, genome annotation, comparative genomics, and clinical testing. We summarize our approach to utilizing available RNA-Seq and other data types in our manual curation process for vertebrate, plant, and other species, and describe a new direction for prokaryotic genomes and protein name management.},
  doi         = {10.1093/nar/gkv1189},
  institution = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Building 38A, 8600 Rockville Pike, Bethesda, MD 20894, USA pruitt@ncbi.nlm.nih.gov.},
  language    = {eng},
  medline-pst = {ppublish},
  owner       = {fbreitwieser},
  pii         = {gkv1189},
  pmc         = {PMC4702849},
  pmid        = {26553804},
  timestamp   = {2016.04.06},
  url         = {http://dx.doi.org/10.1093/nar/gkv1189},
}

@Article{BOndov_APhillippy2011-BB,
  Title                    = {Interactive metagenomic visualization in a Web browser.},
  Author                   = {Ondov, Brian D. and Bergman, Nicholas H. and Phillippy, Adam M.},
  Journal                  = {BMC Bioinformatics},
  Year                     = {2011},
  Pages                    = {385},
  Volume                   = {12},

  Abstract                 = {A critical output of metagenomic studies is the estimation of abundances of taxonomical or functional groups. The inherent uncertainty in assignments to these groups makes it important to consider both their hierarchical contexts and their prediction confidence. The current tools for visualizing metagenomic data, however, omit or distort quantitative hierarchical relationships and lack the facility for displaying secondary variables.Here we present Krona, a new visualization tool that allows intuitive exploration of relative abundances and confidences within the complex hierarchies of metagenomic classifications. Krona combines a variant of radial, space-filling displays with parametric coloring and interactive polar-coordinate zooming. The HTML5 and JavaScript implementation enables fully interactive charts that can be explored with any modern Web browser, without the need for installed software or plug-ins. This Web-based architecture also allows each chart to be an independent document, making them easy to share via e-mail or post to a standard Web server. To illustrate Krona's utility, we describe its application to various metagenomic data sets and its compatibility with popular metagenomic analysis tools.Krona is both a powerful metagenomic visualization tool and a demonstration of the potential of HTML5 for highly accessible bioinformatic visualizations. Its rich and interactive displays facilitate more informed interpretations of metagenomic analyses, while its implementation as a browser-based application makes it extremely portable and easily adopted into existing analysis packages. Both the Krona rendering code and conversion tools are freely available under a BSD open-source license, and available from: http://krona.sourceforge.net.},
  Doi                      = {10.1186/1471-2105-12-385},
  File                     = {Published version:BOndov_APhillippy2011-BB.pdf:PDF},
  Institution              = {National Biodefense Analysis and Countermeasures Center, 110 Thomas Johnson Drive, Frederick, MD 21702, USA. ondovb@nbacc.net},
  Keywords                 = {Computational Biology; Gastrointestinal Tract, microbiology; Humans; Internet; Metagenomics, methods; Software},
  Language                 = {eng},
  Medline-pst              = {epublish},
  Owner                    = {fbreitwieser},
  Pii                      = {1471-2105-12-385},
  Pmid                     = {21961884},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.1186/1471-2105-12-385}
}

@Article{SSalter_AWalker2014-BB,
  Title                    = {Reagent and laboratory contamination can critically impact sequence-based microbiome analyses.},
  Author                   = {Salter, Susannah J. and Cox, Michael J. and Turek, Elena M. and Calus, Szymon T. and Cookson, William O. and Moffatt, Miriam F. and Turner, Paul and Parkhill, Julian and Loman, Nicholas J. and Walker, Alan W.},
  Journal                  = {BMC Biol},
  Year                     = {2014},
  Pages                    = {87},
  Volume                   = {12},

  Abstract                 = {The study of microbial communities has been revolutionised in recent years by the widespread adoption of culture independent analytical techniques such as 16S rRNA gene sequencing and metagenomics. One potential confounder of these sequence-based approaches is the presence of contamination in DNA extraction kits and other laboratory reagents.In this study we demonstrate that contaminating DNA is ubiquitous in commonly used DNA extraction kits and other laboratory reagents, varies greatly in composition between different kits and kit batches, and that this contamination critically impacts results obtained from samples containing a low microbial biomass. Contamination impacts both PCR-based 16S rRNA gene surveys and shotgun metagenomics. We provide an extensive list of potential contaminating genera, and guidelines on how to mitigate the effects of contamination.These results suggest that caution should be advised when applying sequence-based techniques to the study of microbiota present in low biomass environments. Concurrent sequencing of negative control samples is strongly advised.},
  Doi                      = {10.1186/s12915-014-0087-z},
  File                     = {SSalter_AWalker2014-BB.pdf:home/fbreitwieser/MyLibrary/MetagenomeContamination/SSalter_AWalker2014-BB.pdf:PDF},
  Institution              = {Pathogen Genomics Group, Wellcome Trust Sanger Institute, Hinxton, UK. sb18@sanger.ac.uk.},
  Keywords                 = {DNA Contamination; Indicators and Reagents, analysis; Laboratories; Metagenomics; Microbiota; Polymerase Chain Reaction; RNA, Ribosomal, 16S, analysis; Salmonella, genetics; Sequence Analysis, DNA},
  Language                 = {eng},
  Medline-pst              = {epublish},
  Owner                    = {fbreitwieser},
  Pii                      = {s12915-014-0087-z},
  Pmid                     = {25387460},
  Timestamp                = {2015.05.28},
  Url                      = {http://dx.doi.org/10.1186/s12915-014-0087-z}
}

@Article{SSalzberg_CPardo2015-STM,
  Title                    = {Brain infections},
  Author                   = {Salzberg and Breitwieser and Pardo},
  Journal                  = {Science Translational Medicine},
  Year                     = {2015},

  Owner                    = {fbreitwieser},
  Timestamp                = {2015.11.19}
}

@InProceedings{BShneiderman1996-IEEE,
  Title                    = {The eyes have it: A task by data type taxonomy for information visualizations},
  Author                   = {Shneiderman, Ben},
  Booktitle                = {IEEE Symposium on Visual Languages},
  Year                     = {1996},
  Organization             = {IEEE},
  Pages                    = {336--343},

  Owner                    = {fbreitwieser},
  Timestamp                = {2015.11.18}
}

@Article{DTruong_NSegata2015-NM,
  Title                    = {MetaPhlAn2 for enhanced metagenomic taxonomic profiling.},
  Author                   = {Truong, Duy Tin and Franzosa, Eric A. and Tickle, Timothy L. and Scholz, Matthias and Weingart, George and Pasolli, Edoardo and Tett, Adrian and Huttenhower, Curtis and Segata, Nicola},
  Journal                  = {Nat Methods},
  Year                     = {2015},

  Month                    = {Sep},
  Number                   = {10},
  Pages                    = {902--903},
  Volume                   = {12},

  Doi                      = {10.1038/nmeth.3589},
  File                     = {Published version:DTruong_NSegata2015-NM.pdf:PDF},
  Institution              = {Centre for Integrative Biology, University of Trento, Trento, Italy.},
  Language                 = {eng},
  Medline-pst              = {ppublish},
  Owner                    = {fbreitwieser},
  Pii                      = {nmeth.3589},
  Pmid                     = {26418763},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.1038/nmeth.3589}
}

@Article{JWhite_MPop2009-PCB,
  author      = {White, James Robert and Nagarajan, Niranjan and Pop, Mihai},
  title       = {Statistical methods for detecting differentially abundant features in clinical metagenomic samples.},
  journal     = {PLoS Comput Biol},
  year        = {2009},
  volume      = {5},
  number      = {4},
  pages       = {e1000352},
  month       = {Apr},
  abstract    = {Numerous studies are currently underway to characterize the microbial communities inhabiting our world. These studies aim to dramatically expand our understanding of the microbial biosphere and, more importantly, hope to reveal the secrets of the complex symbiotic relationship between us and our commensal bacterial microflora. An important prerequisite for such discoveries are computational tools that are able to rapidly and accurately compare large datasets generated from complex bacterial communities to identify features that distinguish them.We present a statistical method for comparing clinical metagenomic samples from two treatment populations on the basis of count data (e.g. as obtained through sequencing) to detect differentially abundant features. Our method, Metastats, employs the false discovery rate to improve specificity in high-complexity environments, and separately handles sparsely-sampled features using Fisher's exact test. Under a variety of simulations, we show that Metastats performs well compared to previously used methods, and significantly outperforms other methods for features with sparse counts. We demonstrate the utility of our method on several datasets including a 16S rRNA survey of obese and lean human gut microbiomes, COG functional profiles of infant and mature gut microbiomes, and bacterial and viral metabolic subsystem data inferred from random sequencing of 85 metagenomes. The application of our method to the obesity dataset reveals differences between obese and lean subjects not reported in the original study. For the COG and subsystem datasets, we provide the first statistically rigorous assessment of the differences between these populations. The methods described in this paper are the first to address clinical metagenomic datasets comprising samples from multiple subjects. Our methods are robust across datasets of varied complexity and sampling level. While designed for metagenomic applications, our software can also be applied to digital gene expression studies (e.g. SAGE). A web server implementation of our methods and freely available source code can be found at http://metastats.cbcb.umd.edu/.},
  doi         = {10.1371/journal.pcbi.1000352},
  file        = {Published version:JWhite_MPop2009-PCB.pdf:PDF},
  institution = {Applied Mathematics and Scientific Computation Program, Center for Bioinformatics and Computational Biology, University of Maryland, College Park, Maryland, United States of America.},
  keywords    = {Bacteria, classification/genetics/isolation /&/ purification; Chromosome Mapping, methods; DNA, Bacterial, genetics; Gene Expression Profiling, methods; Humans; Intestines, microbiology; Obesity, microbiology},
  language    = {eng},
  medline-pst = {ppublish},
  owner       = {fbreitwieser},
  pmid        = {19360128},
  timestamp   = {2015.12.07},
  url         = {http://dx.doi.org/10.1371/journal.pcbi.1000352},
}

@Article{MWilson_CChiu2014-NEJM,
  Title                    = {Actionable diagnosis of neuroleptospirosis by next-generation sequencing.},
  Author                   = {Wilson, Michael R. and Naccache, Samia N. and Samayoa, Erik and Biagtan, Mark and Bashir, Hiba and Yu, Guixia and Salamat, Shahriar M. and Somasekar, Sneha and Federman, Scot and Miller, Steve and Sokolic, Robert and Garabedian, Elizabeth and Candotti, Fabio and Buckley, Rebecca H. and Reed, Kurt D. and Meyer, Teresa L. and Seroogy, Christine M. and Galloway, Renee and Henderson, Sheryl L. and Gern, James E. and DeRisi, Joseph L. and Chiu, Charles Y.},
  Journal                  = {N Engl J Med},
  Year                     = {2014},

  Month                    = {Jun},
  Number                   = {25},
  Pages                    = {2408--2417},
  Volume                   = {370},

  Abstract                 = {A 14-year-old boy with severe combined immunodeficiency presented three times to a medical facility over a period of 4 months with fever and headache that progressed to hydrocephalus and status epilepticus necessitating a medically induced coma. Diagnostic workup including brain biopsy was unrevealing. Unbiased next-generation sequencing of the cerebrospinal fluid identified 475 of 3,063,784 sequence reads (0.016\%) corresponding to leptospira infection. Clinical assays for leptospirosis were negative. Targeted antimicrobial agents were administered, and the patient was discharged home 32 days later with a status close to his premorbid condition. Polymerase-chain-reaction (PCR) and serologic testing at the Centers for Disease Control and Prevention (CDC) subsequently confirmed evidence of Leptospira santarosai infection.},
  Doi                      = {10.1056/NEJMoa1401268},
  File                     = {Published version:MWilson_CChiu2014-NEJM.pdf:PDF},
  Institution              = {From the Departments of Biochemistry and Biophysics (M.R.W., J.L.D.), Neurology (M.R.W.), and Laboratory Medicine (S.N.N., E.S., G.Y., S.S., S.F., S.M., C.Y.C.), and the Department of Medicine, Division of Infectious Diseases (C.Y.C.), University of California, San Francisco (UCSF), and UCSF-Abbott Viral Diagnostics and Discovery Center (S.N.N., E.S., G.Y., S.S., S.F., S.M., C.Y.C.) - both in San Francisco; the Department of Medicine, Division of Allergy and Immunology (M.B., H.B., J.E.G.), and the Departments of Pathology and Laboratory Medicine (S.M.S., K.D.R.) and Pediatrics (T.L.M., C.M.S., S.L.H., J.E.G.), University of Wisconsin, Madison; the Experimental Transplantation and Immunology Branch, Center for Cancer Research, National Cancer Institute, National Institutes of Health, Bethesda, MD (R.S., E.G., F.C.); the Departments of Pediatrics and Immunology, Division of Allergy and Immunology, Duke University, Durham, NC (R.H.B.); and the Centers for Disease Control and Prevention, Atlanta (R.G.).},
  Keywords                 = {Adenosine Deaminase, deficiency; Adolescent; Agammaglobulinemia, complications; Biopsy; Brain, pathology; Cerebrospinal Fluid, microbiology; DNA, Bacterial, analysis; Fever, etiology; Headache, etiology; Humans; Leptospira, genetics/isolation /&/ purification; Leptospirosis, complications/diagnosis/microbiology; Male; Meningoencephalitis, complications/diagnosis/microbiology; Sequence Analysis, DNA, methods; Severe Combined Immunodeficiency, complications},
  Language                 = {eng},
  Medline-pst              = {ppublish},
  Owner                    = {fbreitwieser},
  Pmid                     = {24896819},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.1056/NEJMoa1401268}
}

@Article{DWood_SSalzberg2014-GB,
  Title                    = {Kraken: ultrafast metagenomic sequence classification using exact alignments.},
  Author                   = {Wood, Derrick E. and Salzberg, Steven L.},
  Journal                  = {Genome Biol},
  Year                     = {2014},
  Number                   = {3},
  Pages                    = {R46},
  Volume                   = {15},

  Abstract                 = {Kraken is an ultrafast and highly accurate program for assigning taxonomic labels to metagenomic DNA sequences. Previous programs designed for this task have been relatively slow and computationally expensive, forcing researchers to use faster abundance estimation programs, which only classify small subsets of metagenomic data. Using exact alignment of k-mers, Kraken achieves classification accuracy comparable to the fastest BLAST program. In its fastest mode, Kraken classifies 100 base pair reads at a rate of over 4.1 million reads per minute, 909 times faster than Megablast and 11 times faster than the abundance estimation program MetaPhlAn. Kraken is available at http://ccb.jhu.edu/software/kraken/.},
  Doi                      = {10.1186/gb-2014-15-3-r46},
  File                     = {Published version:DWood_SSalzberg2014-GB.pdf:PDF},
  Keywords                 = {Archaea, classification/genetics; Bacteria, classification/genetics; Classification; Humans; Metagenome; Metagenomics, methods; Sensitivity and Specificity; Sequence Alignment, methods; Sequence Analysis, DNA, methods; Software},
  Language                 = {eng},
  Medline-pst              = {epublish},
  Owner                    = {fbreitwieser},
  Pii                      = {gb-2014-15-3-r46},
  Pmid                     = {24580807},
  Timestamp                = {2015.11.19},
  Url                      = {http://dx.doi.org/10.1186/gb-2014-15-3-r46}
}

@Article{VSubramanyam_MMondal2015-ECM,
  author    = {Veena Subramanyam and Deepak Paramshivan and Amit Kumar and Md. Alam Hossain Mondal},
  title     = {Using Sankey diagrams to map energy flow from primary fuel to end use},
  journal   = {Energy Conversion and Management},
  year      = {2015},
  volume    = {91},
  pages     = {342--352},
  month     = {feb},
  doi       = {10.1016/j.enconman.2014.12.024},
  owner     = {fbreitwieser},
  publisher = {Elsevier {BV}},
  timestamp = {2016.05.18},
  url       = {http://dx.doi.org/10.1016/j.enconman.2014.12.024},
}

@Article{EAfshinnekoo_CMason2015-CS,
  author      = {Afshinnekoo, Ebrahim and Meydan, Cem and Chowdhury, Shanin and Jaroudi, Dyala and Boyer, Collin and Bernstein, Nick and Maritz, Julia M. and Reeves, Darryl and Gandara, Jorge and Chhangawala, Sagar and Ahsanuddin, Sofia and Simmons, Amber and Nessel, Timothy and Sundaresh, Bharathi and Pereira, Elizabeth and Jorgensen, Ellen and Kolokotronis, Sergios-Orestis and Kirchberger, Nell and Garcia, Isaac and Gandara, David and Dhanraj, Sean and Nawrin, Tanzina and Saletore, Yogesh and Alexander, Noah and Vijay, Priyanka and H{\'{e}}naff, Elizabeth M. and Zumbo, Paul and Walsh, Michael and O'Mullan, Gregory D. and Tighe, Scott and Dudley, Joel T. and Dunaif, Anya and Ennis, Sean and O'Halloran, Eoghan and Magalhaes, Tiago R. and Boone, Braden and Jones, Angela L. and Muth, Theodore R. and Paolantonio, Katie Schneider and Alter, Elizabeth and Schadt, Eric E. and Garbarino, Jeanne and Prill, Robert J. and Carlton, Jane M. and Levy, Shawn and Mason, Christopher E.},
  title       = {Geospatial Resolution of Human and Bacterial Diversity with City-Scale Metagenomics.},
  journal     = {Cell Syst},
  year        = {2015},
  volume      = {1},
  number      = {1},
  pages       = {72--87},
  month       = {Jul},
  abstract    = {The panoply of microorganisms and other species present in our environment influence human health and disease, especially in cities, but have not been profiled with metagenomics at a city-wide scale. We sequenced DNA from surfaces across the entire New York City (NYC) subway system, the Gowanus Canal, and public parks. Nearly half of the DNA (48\%) does not match any known organism; identified organisms spanned 1,688 bacterial, viral, archaeal, and eukaryotic taxa, which were enriched for harmless genera associated with skin (e.g., Acinetobacter). Predicted ancestry of human DNA left on subway surfaces can recapitulate U.S. Census demographic data, and bacterial signatures can reveal a station's history, such as marine-associated bacteria in a hurricane-flooded station. Some evidence of pathogens was found (Bacillus anthracis), but a lack of reported cases in NYC suggests that the pathogens represent a normal, urban microbiome. This baseline metagenomic map of NYC could help long-term disease surveillance, bioterrorism threat mitigation, and health management in the built environment of cities.},
  doi         = {10.1016/j.cels.2015.01.001},
  institution = {ll Medical College, New York, NY 10065, USA ; The HRH Prince Alwaleed Bin Talal Bin Abdulaziz Alsaud Institute for Computational Biomedicine, Weill Cornell Medical College, New York, NY 10065, USA ; The Feil Family Brain and Mind Research Institute, New York, NY 10065, USA.},
  language    = {eng},
  medline-pst = {ppublish},
  owner       = {fbreitwieser},
  pmc         = {PMC4651444},
  pmid        = {26594662},
  timestamp   = {2016.05.19},
  url         = {http://dx.doi.org/10.1016/j.cels.2015.01.001},
}

@Manual{ShinyPackage,
  title     = {shiny: Web Application Framework for R},
  author    = {Winston Chang and Joe Cheng and JJ Allaire and Yihui Xie and Jonathan McPherson},
  year      = {2016},
  note      = {R package version 0.13.2},
  owner     = {fbreitwieser},
  timestamp = {2016.06.02},
  url       = {https://CRAN.R-project.org/package=shiny},
}

@Article{BLangmead_SSalzberg2012-NM,
  author      = {Langmead, Ben and Salzberg, Steven L.},
  title       = {Fast gapped-read alignment with Bowtie 2.},
  journal     = {Nat Methods},
  year        = {2012},
  volume      = {9},
  number      = {4},
  pages       = {357--359},
  month       = {Apr},
  abstract    = {As the rate of sequencing increases, greater throughput is demanded from read aligners. The full-text minute index is often used to make alignment very fast and memory-efficient, but the approach is ill-suited to finding longer, gapped alignments. Bowtie 2 combines the strengths of the full-text minute index with the flexibility and speed of hardware-accelerated dynamic programming algorithms to achieve a combination of high speed, sensitivity and accuracy.},
  doi         = {10.1038/nmeth.1923},
  institution = {Center for Bioinformatics and Computational Biology, Institute for Advanced Computer Studies, University of Maryland, College Park, Maryland, USA. blangmea@jhsph.edu},
  keywords    = {Algorithms; Computational Biology, methods; Databases, Genetic; Genome, Human, genetics; Humans; Sequence Alignment, methods; Sequence Analysis, DNA, methods},
  language    = {eng},
  medline-pst = {epublish},
  owner       = {fbreitwieser},
  pii         = {nmeth.1923},
  pmc         = {PMC3322381},
  pmid        = {22388286},
  timestamp   = {2016.06.02},
  url         = {http://dx.doi.org/10.1038/nmeth.1923},
}

@Manual{RPackage,
  title        = {R: A Language and Environment for Statistical Computing},
  author       = {{R Core Team}},
  organization = {R Foundation for Statistical Computing},
  address      = {Vienna, Austria},
  year         = {2016},
  owner        = {fbreitwieser},
  timestamp    = {2016.06.02},
  url          = {https://www.R-project.org/},
}

@Article{MBostock_JHeer2011-ITVCGa,
  author      = {Bostock, Michael and Ogievetsky, Vadim and Heer, Jeffrey},
  title       = {D\textsuperscript{3}: Data-Driven Documents.},
  journal     = {IEEE Trans Vis Comput Graph},
  year        = {2011},
  volume      = {17},
  number      = {12},
  pages       = {2301--2309},
  month       = {Dec},
  abstract    = {Data-Driven Documents (D3) is a novel representation-transparent approach to visualization for the web. Rather than hide the underlying scenegraph within a toolkit-specific abstraction, D3 enables direct inspection and manipulation of a native representation: the standard document object model (DOM). With D3, designers selectively bind input data to arbitrary document elements, applying dynamic transforms to both generate and modify content. We show how representational transparency improves expressiveness and better integrates with developer tools than prior approaches, while offering comparable notational efficiency and retaining powerful declarative components. Immediate evaluation of operators further simplifies debugging and allows iterative development. Additionally, we demonstrate how D3 transforms naturally enable animation and interaction with dramatic performance improvements over intermediate representations.},
  doi         = {10.1109/TVCG.2011.185},
  institution = {Computer Science Department of Stanford University, Stanford, CA 94305, USA. mbostock@stanford.edu},
  language    = {eng},
  medline-pst = {ppublish},
  owner       = {fbreitwieser},
  pmid        = {22034350},
  timestamp   = {2016.06.02},
  url         = {http://dx.doi.org/10.1109/TVCG.2011.185},
}

@Article{DKim_SSalzber2016-Biorxiv,
  author    = {Kim, Daehwan and Song, Li and Breitwieser, Florian P and Salzberg, Steven L},
  title     = {Centrifuge: rapid and sensitive classification of metagenomic sequences},
  journal   = {Genome Research},
  year      = {2016},
  abstract  = {Centrifuge is a novel microbial classification engine that enables rapid, accurate and sensitive labeling of reads and quantification of species on desktop computers. The system uses an indexing scheme based on the Burrows-Wheeler transform (BWT) and the Ferragina-Manzini (FM) index, optimized specifically for the metagenomic classification problem. Centrifuge requires a relatively small index (4.2 GB for 4,078 bacterial and 200 archaeal genomes) and classifies sequences at very high speed, allowing it to process the millions of reads from a typical high-throughput DNA sequencing run within a few minutes. Together these advances enable timely and accurate analysis of large metagenomics data sets on conventional desktop computers. Because of its space-optimized indexing schemes, Centrifuge also makes it possible to index the entire NCBI non-redundant nucleotide sequence database (a total of 109 billion bases) with an index size of 69 GB, in contrast to k-mer based indexing schemes, which require far more extensive space. Centrifuge is available as free, open-source software from http://www.ccb.jhu.edu/software/centrifuge.},
  doi       = {10.1101/054965},
  eprint    = {http://genome.cshlp.org/content/early/2016/10/17/gr.210641.116?top=1},
  owner     = {fbreitwieser},
  publisher = {Cold Spring Harbor Labs Journals},
  timestamp = {2016.07.27},
  url       = {http://biorxiv.org/content/early/2016/05/25/054965},
}

@Article{SFlygare_RSchlaberg2016-GB,
  author      = {Flygare, Steven and Simmon, Keith and Miller, Chase and Qiao, Yi and Kennedy, Brett and {Di Sera}, Tonya and Graf, Erin H. and Tardif, Keith D. and Kapusta, Aur{\'{e}}lie and Rynearson, Shawn and Stockmann, Chris and Queen, Krista and Tong, Suxiang and Voelkerding, Karl V. and Blaschke, Anne and Byington, Carrie L. and Jain, Seema and Pavia, Andrew and Ampofo, Krow and Eilbeck, Karen and Marth, Gabor and Yandell, Mark and Schlaberg, Robert},
  title       = {Taxonomer: an interactive metagenomics analysis portal for universal pathogen detection and host mRNA expression profiling.},
  journal     = {Genome Biol},
  year        = {2016},
  volume      = {17},
  number      = {1},
  pages       = {111},
  abstract    = {High-throughput sequencing enables unbiased profiling of microbial communities, universal pathogen detection, and host response to infectious diseases. However, computation times and algorithmic inaccuracies have hindered adoption.We present Taxonomer, an ultrafast, web-tool for comprehensive metagenomics data analysis and interactive results visualization. Taxonomer is unique in providing integrated nucleotide and protein-based classification and simultaneous host messenger RNA (mRNA) transcript profiling. Using real-world case-studies, we show that Taxonomer detects previously unrecognized infections and reveals antiviral host mRNA expression profiles. To facilitate data-sharing across geographic distances in outbreak settings, Taxonomer is publicly available through a web-based user interface.Taxonomer enables rapid, accurate, and interactive analyses of metagenomics data on personal computers and mobile devices.},
  doi         = {10.1186/s13059-016-0969-1},
  institution = {ARUP Institute for Clinical and Experimental Pathology, Salt Lake City, UT, USA. robert.schlaberg@path.utah.edu.},
  language    = {eng},
  medline-pst = {epublish},
  owner       = {fbreitwieser},
  pii         = {10.1186/s13059-016-0969-1},
  pmc         = {PMC4880956},
  pmid        = {27224977},
  timestamp   = {2016.07.27},
  url         = {http://dx.doi.org/10.1186/s13059-016-0969-1},
}

@Article{SSalzberg_CPardo2016-NNN,
  author      = {Salzberg, Steven L. and Breitwieser, Florian P. and Kumar, Anupama and Hao, Haiping and Burger, Peter and Rodriguez, Fausto J. and Lim, Michael and Qui{\~{n}}ones-Hinojosa, Alfredo and Gallia, Gary L. and Tornheim, Jeffrey A. and Melia, Michael T. and Sears, Cynthia L. and Pardo, Carlos A.},
  title       = {Next-generation sequencing in neuropathologic diagnosis of infections of the nervous system.},
  journal     = {Neurol Neuroimmunol Neuroinflamm},
  year        = {2016},
  volume      = {3},
  number      = {4},
  pages       = {e251},
  month       = {Aug},
  abstract    = {To determine the feasibility of next-generation sequencing (NGS) microbiome approaches in the diagnosis of infectious disorders in brain or spinal cord biopsies in patients with suspected CNS infections.In a prospective pilot study, we applied NGS in combination with a new computational analysis pipeline to detect the presence of pathogenic microbes in brain or spinal cord biopsies from 10 patients with neurologic problems indicating possible infection but for whom conventional clinical and microbiology studies yielded negative or inconclusive results.Direct DNA and RNA sequencing of brain tissue biopsies generated 8.3 million to 29.1 million sequence reads per sample, which successfully identified with high confidence the infectious agent in 3 patients for whom validation techniques confirmed the pathogens identified by NGS. Although NGS was unable to identify with precision infectious agents in the remaining cases, it contributed to the understanding of neuropathologic processes in 5 others, demonstrating the power of large-scale unbiased sequencing as a novel diagnostic tool. Clinical outcomes were consistent with the findings yielded by NGS on the presence or absence of an infectious pathogenic process in 8 of 10 cases, and were noncontributory in the remaining 2.NGS-guided metagenomic studies of brain, spinal cord, or meningeal biopsies offer the possibility for dramatic improvements in our ability to detect (or rule out) a wide range of CNS pathogens, with potential benefits in speed, sensitivity, and cost. NGS-based microbiome approaches present a major new opportunity to investigate the potential role of infectious pathogens in the pathogenesis of neuroinflammatory disorders.},
  doi         = {10.1212/NXI.0000000000000251},
  institution = {ter Science, and Biostatistics (S.L.S.), Johns Hopkins University, Baltimore, MD.},
  language    = {eng},
  medline-pst = {epublish},
  owner       = {fbreitwieser},
  pii         = {NEURIMMINFL2016009183},
  pmc         = {PMC4907805},
  pmid        = {27340685},
  timestamp   = {2016.07.28},
  url         = {http://dx.doi.org/10.1212/NXI.0000000000000251},
}

@Article{SLindgreen_PGardner2016-SR,
  author               = {Lindgreen, Stinus and Adair, Karen L and Gardner, Paul P},
  title                = {An evaluation of the accuracy and speed of metagenome analysis tools.},
  journal              = {Scientific Reports},
  year                 = {2016},
  volume               = {6},
  pages                = {19233},
  __markedentry        = {[fbreitwieser:]},
  abstract             = {Metagenome studies are becoming increasingly widespread, yielding important insights into microbial communities covering diverse environments from terrestrial and aquatic ecosystems to human skin and gut. With the advent of high-throughput sequencing platforms, the use of large scale shotgun sequencing approaches is now commonplace. However, a thorough independent benchmark comparing state-of-the-art metagenome analysis tools is lacking. Here, we present a benchmark where the most widely used tools are tested on complex, realistic data sets. Our results clearly show that the most widely used tools are not necessarily the most accurate, that the most accurate tool is not necessarily the most time consuming, and that there is a high degree of variability between available tools. These findings are important as the conclusions of any metagenomics study are affected by errors in the predicted community composition and functional capacity. Data sets and results are freely available from http://www.ucbioinformatics.org/metabenchmark.html.},
  citation-subset      = {IM},
  country              = {England},
  created              = {2016-01-18},
  doi                  = {10.1038/srep19233},
  issn                 = {2045-2322},
  issn-linking         = {2045-2322},
  journal-abbreviation = {Sci Rep},
  nlm                  = {PMC4726098},
  nlm-id               = {101563288},
  owner                = {NLM},
  pmid                 = {26778510},
  pubmodel             = {Electronic},
  pubstatus            = {epublish},
  revised              = {2016-01-28},
  status               = {In-Process},
  timestamp            = {2016.10.13},
}

@Article{SFlygare_RSchlaberg2016-GBa,
  author               = {Flygare, Steven and Simmon, Keith and Miller, Chase and Qiao, Yi and Kennedy, Brett and Di Sera, Tonya and Graf, Erin H and Tardif, Keith D and Kapusta, Aurélie and Rynearson, Shawn and Stockmann, Chris and Queen, Krista and Tong, Suxiang and Voelkerding, Karl V and Blaschke, Anne and Byington, Carrie L and Jain, Seema and Pavia, Andrew and Ampofo, Krow and Eilbeck, Karen and Marth, Gabor and Yandell, Mark and Schlaberg, Robert},
  title                = {Taxonomer: an interactive metagenomics analysis portal for universal pathogen detection and host mRNA expression profiling.},
  journal              = {Genome Biology},
  year                 = {2016},
  volume               = {17},
  pages                = {111},
  month                = {May},
  __markedentry        = {[fbreitwieser:6]},
  abstract             = {High-throughput sequencing enables unbiased profiling of microbial communities, universal pathogen detection, and host response to infectious diseases. However, computation times and algorithmic inaccuracies have hindered adoption. We present Taxonomer, an ultrafast, web-tool for comprehensive metagenomics data analysis and interactive results visualization. Taxonomer is unique in providing integrated nucleotide and protein-based classification and simultaneous host messenger RNA (mRNA) transcript profiling. Using real-world case-studies, we show that Taxonomer detects previously unrecognized infections and reveals antiviral host mRNA expression profiles. To facilitate data-sharing across geographic distances in outbreak settings, Taxonomer is publicly available through a web-based user interface. Taxonomer enables rapid, accurate, and interactive analyses of metagenomics data on personal computers and mobile devices.},
  citation-subset      = {IM},
  country              = {England},
  created              = {2016-5-26},
  doi                  = {10.1186/s13059-016-0969-1},
  issn                 = {1474-760X},
  issn-linking         = {1474-7596},
  issue                = {1},
  journal-abbreviation = {Genome Biol},
  keywords             = {Infectious disease diagnostics; Metagenomics; Microbiome; Pathogen detection},
  nlm                  = {PMC4880956},
  nlm-id               = {100960660},
  owner                = {NLM},
  pmid                 = {27224977},
  pubmodel             = {Electronic},
  pubstatus            = {epublish},
  revised              = {2016-5-29},
  status               = {In-Data-Review},
  timestamp            = {2016.10.19},
}

@Article{EGraf_RSchlaberg2016-Jocm,
  author               = {Graf, Erin H and Simmon, Keith E and Tardif, Keith D and Hymas, Weston and Flygare, Steven and Eilbeck, Karen and Yandell, Mark and Schlaberg, Robert},
  title                = {Unbiased Detection of Respiratory Viruses by Use of RNA Sequencing-Based Metagenomics: a Systematic Comparison to a Commercial PCR Panel.},
  journal              = {Journal of clinical microbiology},
  year                 = {2016},
  volume               = {54},
  pages                = {1000--1007},
  month                = {Apr},
  __markedentry        = {[fbreitwieser:6]},
  abstract             = {Current infectious disease molecular tests are largely pathogen specific, requiring test selection based on the patient's symptoms. For many syndromes caused by a large number of viral, bacterial, or fungal pathogens, such as respiratory tract infections, this necessitates large panels of tests and has limited yield. In contrast, next-generation sequencing-based metagenomics can be used for unbiased detection of any expected or unexpected pathogen. However, barriers for its diagnostic implementation include incomplete understanding of analytical performance and complexity of sequence data analysis. We compared detection of known respiratory virus-positive (n= 42) and unselected (n= 67) pediatric nasopharyngeal swabs using an RNA sequencing (RNA-seq)-based metagenomics approach and Taxonomer, an ultrarapid, interactive, web-based metagenomics data analysis tool, with an FDA-cleared respiratory virus panel (RVP; GenMark eSensor). Untargeted metagenomics detected 86% of known respiratory virus infections, and additional PCR testing confirmed RVP results for only 2 (33%) of the discordant samples. In unselected samples, untargeted metagenomics had excellent agreement with the RVP (93%). In addition, untargeted metagenomics detected an additional 12 viruses that were either not targeted by the RVP or missed due to highly divergent genome sequences. Normalized viral read counts for untargeted metagenomics correlated with viral burden determined by quantitative PCR and showed high intrarun and interrun reproducibility. Partial or full-length viral genome sequences were generated in 86% of RNA-seq-positive samples, allowing assessment of antiviral resistance, strain-level typing, and phylogenetic relatedness. Overall, untargeted metagenomics had high agreement with a sensitive RVP, detected viruses not targeted by the RVP, and yielded epidemiologically and clinically valuable sequence information.},
  citation-subset      = {IM},
  copyright            = {Copyright © 2016, American Society for Microbiology. All Rights Reserved.},
  country              = {United States},
  created              = {2016-3-26},
  doi                  = {10.1128/JCM.03060-15},
  issn                 = {1098-660X},
  issn-linking         = {0095-1137},
  issue                = {4},
  journal-abbreviation = {J Clin Microbiol},
  nlm                  = {PMC4809917 [Available on 10/01/16]},
  nlm-id               = {7505564},
  owner                = {NLM},
  pmid                 = {26818672},
  pubmodel             = {Print-Electronic},
  pubstatus            = {ppublish},
  revised              = {2016-4-5},
  status               = {In-Process},
  timestamp            = {2016.10.19},
}

@Article{Meren_TDelmont-PeerJ2015,
  author    = {Eren, A Murat and Esen, {\"O}zcan C and Quince, Christopher and Vineis, Joseph H and Morrison, Hilary G and Sogin, Mitchell L and Delmont, Tom O},
  title     = {Anvi’o: an advanced analysis and visualization platform for ‘omics data},
  journal   = {PeerJ},
  year      = {2015},
  volume    = {3},
  pages     = {e1319},
  owner     = {fbreitwieser},
  publisher = {PeerJ Inc.},
  timestamp = {2016.10.31},
}

@Article{JPaulsen_MPop-2013-NM,
  author    = {Paulson, Joseph N and Stine, O Colin and Bravo, H{\'e}ctor Corrada and Pop, Mihai},
  title     = {Differential abundance analysis for microbial marker-gene surveys},
  journal   = {Nature Methods},
  year      = {2013},
  volume    = {10},
  number    = {12},
  pages     = {1200--1202},
  owner     = {fbreitwieser},
  publisher = {Nature Publishing Group},
  timestamp = {2016.11.11},
}

@article{FBreitwieser_SSalzberg2018-GB,
  title={KrakenUniq: confident and fast metagenomics classification using unique k-mer counts},
  author={Breitwieser, FP and Baker, DN and Salzberg, Steven L},
  journal={Genome Biology},
  volume={19},
  number={1},
  pages={198},
  year={2018},
  publisher={BioMed Central}
}

@Comment{jabref-meta: databaseType:bibtex;}
